\documentclass[a4paper]{article}
\usepackage{amsmath , amssymb, amsthm}
\title{Algorithm}
\date{}
\begin{document}
\maketitle
\begin{equation}
  \label{eq:model}
  \mathbf{y_i} = \mathbf{X_i}\mathbf{\beta} + \mathbf{Z_i}\mathbf{b_i} + \mathbf{\epsilon_i}
\end{equation}
where $\mathbf{b_i}\sim N(\mathbf{0},\mathbf{\Psi})$ and $\mathbf{\epsilon_i}\sim N(\mathbf{0},\sigma^2\mathbf{I})$, $i = 1,...,M$ \newline
for our case $\mathbf{\Psi}=\sigma_b^2\mathbf{R}$ \newline
For $\frac{\mathbf{\Psi^{-1}}}{1/\sigma^2}$, do Cholesky decomposition:
\begin{alignat}{2}
\frac{\mathbf{\Psi^{-1}}}{1/\sigma^2} &= \sigma^2\mathbf{\Psi^{-1}}& &= \frac{\sigma^2}{\sigma_b^2}\mathbf{R^{-1}}\nonumber\\
&= \mathbf{\Delta^T}\mathbf{\Delta}& &= \left(\frac{\sigma}{\sigma_b}\mathbf{\Delta_1}\right)^T\left(\frac{\sigma}{\sigma_b}\mathbf{\Delta_1}\right)
\end{alignat}

The log likelihood function is
\begin{align}
  L(\mathbf{\beta},\mathbf{\theta},\sigma^2|\mathbf{y}) &= \prod^{M}_{i=1}f(\mathbf{y_i}|\mathbf{\beta},\mathbf{\theta},\sigma^2) \nonumber\\
  &=\prod^{M}_{i=1}\int f(\mathbf{y_i}|\mathbf{b_i},\mathbf{\beta},\sigma^2)f(\mathbf{b_i}|\mathbf{\theta},\sigma^2)d\mathbf{b_i}
\end{align}
where
\begin{equation}
  \label{eq:ygivenb}
  f(\mathbf{y_i}|\mathbf{b_i},\mathbf{\beta},\sigma^2) = \frac{1}{(2\pi\sigma^2)^{n_i/2}}exp(-\frac{\|\mathbf{y_i}-\mathbf{X_i}\beta-\mathbf{Z_i}\mathbf{b_i}\|^2}{2\sigma^2})   
\end{equation}
and
\begin{align}
  \label{eq:bi}
  f(\mathbf{b_i}|\mathbf{\theta},\sigma^2) &= \frac{1}{(2\pi)^{q/2}\sqrt{|\mathbf{\Psi}|}}exp(-\mathbf{b_i}^T\mathbf{\Psi}^{-1}\mathbf{b_i})\nonumber\\
  &= \frac{1}{(2\pi\sigma^2)^{q/2}abs|\mathbf{\Delta}|^{-1}}exp(-\frac{\|\mathbf{\Delta}\mathbf{b_i}\|^2}{2\sigma^2})
\end{align}

Combining \eqref{eq:ygivenb} and \eqref{eq:bi} gives:
\begin{align}
  \label{eq:likelihood}
 L(\mathbf{\beta},\mathbf{\theta},\sigma^2|\mathbf{y}) &= \prod^{M}_{i=1}\frac{abs|\mathbf{\Delta}|}{(2\pi\sigma^2)^{n_i/2}}\int\frac{exp[-(\|\mathbf{y_i}-\mathbf{X_i}\beta-\mathbf{Z_i}\mathbf{b_i}\|^2 + \|\mathbf{\Delta}\mathbf{b_i}\|^2)/2\sigma^2]}{(2\pi\sigma^2)^{q/2}}d\mathbf{b_i}\nonumber\\  
& = \prod^{M}_{i=1}\frac{abs|\mathbf{\Delta}|}{(2\pi\sigma^2)^{n_i/2}}\int\frac{exp(-\|\mathbf{\tilde{y}_i}-\mathbf{\tilde{X}_i}\beta-\mathbf{\tilde{Z}_i}\mathbf{b_i}\|^2/2\sigma^2)}{(2\pi\sigma^2)^{q/2}}d\mathbf{b_i}
\end{align}

where
\begin{equation}
  \label{eq:yxztilde}
  \mathbf{\tilde{y}_i}= 
  \begin{bmatrix}
    \mathbf{y_i}\\
    \mathbf{0}
  \end{bmatrix} 
,\hspace{0.5cm}
 \mathbf{\tilde{X}_i}= 
\begin{bmatrix}
   \mathbf{X_i}\\
  \mathbf{0}
  \end{bmatrix}
,\hspace{0.5cm}
 \mathbf{\tilde{Z}_i}= 
\begin{bmatrix}
  \mathbf{Z_i}\\
 \mathbf{\Delta}
 \end{bmatrix}
\end{equation}

For $\mathbf{\tilde{Z}_i}$, do QR decomposition:
\begin{equation}
  \label{eq:qr}
  \mathbf{\tilde{Z_i}} = \mathbf{Q_{(i)}}
  \begin{bmatrix}
    \mathbf{R_{11(i)}}\\
    \mathbf{0}
  \end{bmatrix}
\end{equation}

Then,
\begin{align}
  \label{eq:qr1}
  \|\mathbf{\tilde{y}_i}-\mathbf{\tilde{X_i}}\beta-\mathbf{\tilde{Z_i}}\mathbf{b_i}\|^2 &= \|\mathbf{Q_{(i)}^T}(\mathbf{\tilde{y_i}}-\mathbf{\tilde{X_i}}\beta-\mathbf{\tilde{Z_i}}\mathbf{b_i})\|^2 \nonumber \\
&= \|\mathbf{Q_{(i)}^T}\mathbf{\tilde{y_i}}-\mathbf{Q_{(i)}^T}\mathbf{\tilde{X_i}}\beta-\mathbf{Q_{(i)}^T}\mathbf{\tilde{Z_i}}\mathbf{b_i})\|^2 \nonumber \\
&= \| 
\begin{bmatrix}
\mathbf{C_{1(i)}}\\
\mathbf{C_{0(i)}}
\end{bmatrix}
-
\begin{bmatrix}
  \mathbf{R_{10(i)}}\\
  \mathbf{R_{00(i)}}
\end{bmatrix}
\mathbf{\beta}
-
\begin{bmatrix}
  \mathbf{R_{11(i)}}\\
  \mathbf{0}
\end{bmatrix}
\|^2  \nonumber \\
&= \|\mathbf{C_{1(i)}}-\mathbf{R_{10(i)}}\beta-\mathbf{R_{11(i)}}\mathbf{b_i}\|^2 + \|\mathbf{C_{0(i)}} - \mathbf{R_{00(i)}\mathbf{\beta}}\|^2  
\end{align}

It can be proved that
\begin{align}
  \label{eq:book216}
  &\int\frac{exp(\|\mathbf{C_{1(i)}}-\mathbf{R_{10(i)}}\beta-\mathbf{R_{11(i)}}\mathbf{b_i}\|^2/2\sigma^2)}{(2\pi\sigma^2)^{q/2}}d\mathbf{b_i} \nonumber \\
  &=\frac{1}{abs|\mathbf{R_{11(i)}}|}\int\frac{exp(-\|\mathbf{\phi_i}\|^2)}{(2\pi)^{q/2}}d\mathbf{\phi_i} \nonumber \\
  &=\frac{1}{abs|\mathbf{R_{11(i)}}|}
\end{align}
where $\mathbf{\phi_i}=(\mathbf{C_{1(i)}}-\mathbf{R_{10(i)}}\beta-\mathbf{R_{11(i)}}\mathbf{b_i})/\sigma$


Substituting \eqref{eq:book216} and \eqref{eq:qr1} into \eqref{eq:likelihood}, the loglikelihood function becomes:

\begin{align}
  \label{eq:likelihood1}
  L(\mathbf{\beta},\mathbf{\theta},\sigma^2|\mathbf{y}) &= \prod^{M}_{i=1}\frac{exp[-\|\mathbf{C_{0(i)}} - \mathbf{R_{00(i)}\mathbf{\beta}}\|^2/2\sigma^2]}{(2\pi\sigma^2)^{n_i/2}}\text{abs}\left( \frac{|\mathbf{\Delta}|}{|\mathbf{R_{11(i)}}|} \right) \nonumber \\
&=\frac{exp[-\sum_{i=1}^M\|\mathbf{C_{0(i)}} - \mathbf{R_{00(i)}\mathbf{\beta}}\|^2/2\sigma^2]}{(2\pi\sigma^2)^{N/2}}\prod^{M}_{i=1}\text{abs}\left( \frac{|\mathbf{\Delta}|}{|\mathbf{R_{11(i)}}|} \right) 
\end{align}

Then stack $R_{00(i)}$ and $C_{0(i)}$ and do the second QR decomposition:
\begin{equation}
  \label{eq:qr2}
  \begin{bmatrix}
    \mathbf{R_{00(1)}} & \mathbf{C_{0(1)}}\\
    \vdots &  \vdots \\
    \mathbf{R_{00(M)}} & \mathbf{\mathbf{C_{0(M)}}}
  \end{bmatrix}
  =
  \mathbf{Q_0}
  \begin{bmatrix}
    \mathbf{R_{00}} & \mathbf{C_{0}}\\
    \mathbf{0} & \mathbf{\mathbf{C_{-1}}}
  \end{bmatrix}
\end{equation}

which lead to
\begin{align}
  \label{eq:likelihoodfinal}
  &L(\mathbf{\beta},\mathbf{\theta},\sigma^2|\mathbf{y}) \nonumber \\
  &=(2\pi\sigma^2)^{-N/2}exp \left(-\frac{\|\mathbf{C_{-1}}\|^2 + \|\mathbf{C_0} - \mathbf{R_{00}\mathbf{\beta}}\|^2}{2\sigma^2}\right) \prod^{M}_{i=1}\text{abs}\left( \frac{|\mathbf{\Delta}|}{|\mathbf{R_{11(i)}}|} \right) 
\end{align}

For a given $\mathbf{\theta}$, the values of $\mathbf{\beta}$ and $\sigma^2$ that maximize \eqref{eq:likelihoodfinal} are
\begin{equation}
  \label{eq:estimator}
  \mathbf{\hat{\beta}(\theta)}=\mathbf{R_{00}^{-1}}\mathbf{C_0} \quad \text{and} \quad \mathbf{\hat{\sigma}^2(\theta)}=\frac{\|\mathbf{C_{-1}}\|^2}{N}
\end{equation}

which give the profiled likelihood
\begin{align}
\label{eq:profiledlikelihood}
  &L(\mathbf{\hat{\beta}(\theta)},\mathbf{\theta},\hat{\sigma}^2|\mathbf{y}) \nonumber \\
=& \left( \frac{N}{2\pi\|\mathbf{C_{-1}}\|^2} \right)^{N/2} \text{exp} \left(-\frac{N}{2}\right)\prod^{M}_{i=1}\text{abs}\left( \frac{|\mathbf{\Delta}|}{|\mathbf{R_{11(i)}}|} \right)   
\end{align}
or the profiled log-likelihood
\begin{align}
\label{eq:profiledloglikelihood}
\mathit{l}(\mathbf{\theta}|\mathbf{y}) &= \text{log}L(\mathbf{\theta}|\mathbf{y}) \nonumber \\
&= \frac{N}{2}[\text{log}N - \text{log}(2\pi) - 1] - N\text{log}\|\mathbf{C_{-1}}\| + \sum_{i=1}^M\text{log abs}\left( \frac{|\mathbf{\Delta}|}{|\mathbf{R_{11(i)}}|} \right)   
\end{align}
The remaining work is to find $\sigma$ and $\sigma_b$ to maximize the profiled log-likelihood function.

\begin{center}
==========================
\end{center}

For our occasion $\mathbf{\tilde{Z}_i}$ are identical($\mathbf{\tilde{Z}}=\mathbf{\tilde{Z}_i}$), so $\mathbf{R_{11(i)}}$ are the same($\mathbf{R_{11}}=\mathbf{R_{11(i)}}$), we only need to do the first QR decomposition once(see equation \eqref{eq:qr}
$  \mathbf{\tilde{Z}} = \mathbf{Q}
  \begin{bmatrix}
    \mathbf{R_{11}}\\
    \mathbf{0}
  \end{bmatrix}
$
) and the last part in \eqref{eq:profiledloglikelihood} become $\text{abs}\left( \frac{|\mathbf{\Delta}|}{|\mathbf{R_{11}}|} \right)^M$.
And then multiply $\mathbf{Q^T}$ with $\mathbf{\tilde{y}_i}$ and $\mathbf{\tilde{X}_i}$ to get $R_{00(i)}$ and $C_{0(i)}$(equation \eqref{eq:qr1}), then do the second QR decomposition after stacking $R_{00(i)}$ and $C_{0(i)}$ into a matrix (equation \eqref{eq:qr2}) to get $\mathbf{C_{-1}}$,$\mathbf{R_{00}}$ and $\mathbf{C_0}$.

The variables in the log-likelihood function is $\sigma$ and $\sigma_b$, consider to use \textit{optim} function in \textit{R} to get the results.
\end{document}